{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Modules and Packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pickle,csv\n",
    "from dataloader import get_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* [Python Standard Library](https://docs.python.org/3/library/) - Python runtime services,Generic Operating System,  Services, Debugging \n",
    "* Numpy, Matplotlib\n",
    "* Pytorch, Tensorflow"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Sources  and Common data store formats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Python objects - pkl \n",
    "* Numeric data - npz \n",
    "* Multi-data - csv \n",
    "* Plain text - txt \n",
    "* Large Datasets - HDF5 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'age': 23, 'hobbies': ['photography', 'running', 'travelling']}"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pickle\n",
    "obj = { 'age':23,'hobbies':['photography','running','travelling'] }\n",
    "pickle.dump(obj,open('store.pkl','wb'))\n",
    "\n",
    "obj2 = pickle.load(open('store.pkl','rb'))\n",
    "obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']\n",
      "['5.1', '3.5', '1.4', '0.2', 'setosa']\n",
      "['4.9', '3', '1.4', '0.2', 'setosa']\n",
      "['4.7', '3.2', '1.3', '0.2', 'setosa']\n",
      "['4.6', '3.1', '1.5', '0.2', 'setosa']\n",
      "['5', '3.6', '1.4', '0.2', 'setosa']\n",
      "['5.4', '3.9', '1.7', '0.4', 'setosa']\n",
      "['4.6', '3.4', '1.4', '0.3', 'setosa']\n",
      "['5', '3.4', '1.5', '0.2', 'setosa']\n",
      "['4.4', '2.9', '1.4', '0.2', 'setosa']\n",
      "['4.9', '3.1', '1.5', '0.1', 'setosa']\n",
      "['5.4', '3.7', '1.5', '0.2', 'setosa']\n",
      "['4.8', '3.4', '1.6', '0.2', 'setosa']\n",
      "['4.8', '3', '1.4', '0.1', 'setosa']\n",
      "['4.3', '3', '1.1', '0.1', 'setosa']\n",
      "['5.8', '4', '1.2', '0.2', 'setosa']\n",
      "['5.7', '4.4', '1.5', '0.4', 'setosa']\n",
      "['5.4', '3.9', '1.3', '0.4', 'setosa']\n",
      "['5.1', '3.5', '1.4', '0.3', 'setosa']\n",
      "['5.7', '3.8', '1.7', '0.3', 'setosa']\n",
      "['5.1', '3.8', '1.5', '0.3', 'setosa']\n",
      "['5.4', '3.4', '1.7', '0.2', 'setosa']\n",
      "['5.1', '3.7', '1.5', '0.4', 'setosa']\n",
      "['4.6', '3.6', '1', '0.2', 'setosa']\n",
      "['5.1', '3.3', '1.7', '0.5', 'setosa']\n",
      "['4.8', '3.4', '1.9', '0.2', 'setosa']\n",
      "['5', '3', '1.6', '0.2', 'setosa']\n",
      "['5', '3.4', '1.6', '0.4', 'setosa']\n",
      "['5.2', '3.5', '1.5', '0.2', 'setosa']\n",
      "['5.2', '3.4', '1.4', '0.2', 'setosa']\n",
      "['4.7', '3.2', '1.6', '0.2', 'setosa']\n",
      "['4.8', '3.1', '1.6', '0.2', 'setosa']\n",
      "['5.4', '3.4', '1.5', '0.4', 'setosa']\n",
      "['5.2', '4.1', '1.5', '0.1', 'setosa']\n",
      "['5.5', '4.2', '1.4', '0.2', 'setosa']\n",
      "['4.9', '3.1', '1.5', '0.1', 'setosa']\n",
      "['5', '3.2', '1.2', '0.2', 'setosa']\n",
      "['5.5', '3.5', '1.3', '0.2', 'setosa']\n",
      "['4.9', '3.1', '1.5', '0.1', 'setosa']\n",
      "['4.4', '3', '1.3', '0.2', 'setosa']\n",
      "['5.1', '3.4', '1.5', '0.2', 'setosa']\n",
      "['5', '3.5', '1.3', '0.3', 'setosa']\n",
      "['4.5', '2.3', '1.3', '0.3', 'setosa']\n",
      "['4.4', '3.2', '1.3', '0.2', 'setosa']\n",
      "['5', '3.5', '1.6', '0.6', 'setosa']\n",
      "['5.1', '3.8', '1.9', '0.4', 'setosa']\n",
      "['4.8', '3', '1.4', '0.3', 'setosa']\n",
      "['5.1', '3.8', '1.6', '0.2', 'setosa']\n",
      "['4.6', '3.2', '1.4', '0.2', 'setosa']\n",
      "['5.3', '3.7', '1.5', '0.2', 'setosa']\n",
      "['5', '3.3', '1.4', '0.2', 'setosa']\n",
      "['7', '3.2', '4.7', '1.4', 'versicolor']\n",
      "['6.4', '3.2', '4.5', '1.5', 'versicolor']\n",
      "['6.9', '3.1', '4.9', '1.5', 'versicolor']\n",
      "['5.5', '2.3', '4', '1.3', 'versicolor']\n",
      "['6.5', '2.8', '4.6', '1.5', 'versicolor']\n",
      "['5.7', '2.8', '4.5', '1.3', 'versicolor']\n",
      "['6.3', '3.3', '4.7', '1.6', 'versicolor']\n",
      "['4.9', '2.4', '3.3', '1', 'versicolor']\n",
      "['6.6', '2.9', '4.6', '1.3', 'versicolor']\n",
      "['5.2', '2.7', '3.9', '1.4', 'versicolor']\n",
      "['5', '2', '3.5', '1', 'versicolor']\n",
      "['5.9', '3', '4.2', '1.5', 'versicolor']\n",
      "['6', '2.2', '4', '1', 'versicolor']\n",
      "['6.1', '2.9', '4.7', '1.4', 'versicolor']\n",
      "['5.6', '2.9', '3.6', '1.3', 'versicolor']\n",
      "['6.7', '3.1', '4.4', '1.4', 'versicolor']\n",
      "['5.6', '3', '4.5', '1.5', 'versicolor']\n",
      "['5.8', '2.7', '4.1', '1', 'versicolor']\n",
      "['6.2', '2.2', '4.5', '1.5', 'versicolor']\n",
      "['5.6', '2.5', '3.9', '1.1', 'versicolor']\n",
      "['5.9', '3.2', '4.8', '1.8', 'versicolor']\n",
      "['6.1', '2.8', '4', '1.3', 'versicolor']\n",
      "['6.3', '2.5', '4.9', '1.5', 'versicolor']\n",
      "['6.1', '2.8', '4.7', '1.2', 'versicolor']\n",
      "['6.4', '2.9', '4.3', '1.3', 'versicolor']\n",
      "['6.6', '3', '4.4', '1.4', 'versicolor']\n",
      "['6.8', '2.8', '4.8', '1.4', 'versicolor']\n",
      "['6.7', '3', '5', '1.7', 'versicolor']\n",
      "['6', '2.9', '4.5', '1.5', 'versicolor']\n",
      "['5.7', '2.6', '3.5', '1', 'versicolor']\n",
      "['5.5', '2.4', '3.8', '1.1', 'versicolor']\n",
      "['5.5', '2.4', '3.7', '1', 'versicolor']\n",
      "['5.8', '2.7', '3.9', '1.2', 'versicolor']\n",
      "['6', '2.7', '5.1', '1.6', 'versicolor']\n",
      "['5.4', '3', '4.5', '1.5', 'versicolor']\n",
      "['6', '3.4', '4.5', '1.6', 'versicolor']\n",
      "['6.7', '3.1', '4.7', '1.5', 'versicolor']\n",
      "['6.3', '2.3', '4.4', '1.3', 'versicolor']\n",
      "['5.6', '3', '4.1', '1.3', 'versicolor']\n",
      "['5.5', '2.5', '4', '1.3', 'versicolor']\n",
      "['5.5', '2.6', '4.4', '1.2', 'versicolor']\n",
      "['6.1', '3', '4.6', '1.4', 'versicolor']\n",
      "['5.8', '2.6', '4', '1.2', 'versicolor']\n",
      "['5', '2.3', '3.3', '1', 'versicolor']\n",
      "['5.6', '2.7', '4.2', '1.3', 'versicolor']\n",
      "['5.7', '3', '4.2', '1.2', 'versicolor']\n",
      "['5.7', '2.9', '4.2', '1.3', 'versicolor']\n",
      "['6.2', '2.9', '4.3', '1.3', 'versicolor']\n",
      "['5.1', '2.5', '3', '1.1', 'versicolor']\n",
      "['5.7', '2.8', '4.1', '1.3', 'versicolor']\n",
      "['6.3', '3.3', '6', '2.5', 'virginica']\n",
      "['5.8', '2.7', '5.1', '1.9', 'virginica']\n",
      "['7.1', '3', '5.9', '2.1', 'virginica']\n",
      "['6.3', '2.9', '5.6', '1.8', 'virginica']\n",
      "['6.5', '3', '5.8', '2.2', 'virginica']\n",
      "['7.6', '3', '6.6', '2.1', 'virginica']\n",
      "['4.9', '2.5', '4.5', '1.7', 'virginica']\n",
      "['7.3', '2.9', '6.3', '1.8', 'virginica']\n",
      "['6.7', '2.5', '5.8', '1.8', 'virginica']\n",
      "['7.2', '3.6', '6.1', '2.5', 'virginica']\n",
      "['6.5', '3.2', '5.1', '2', 'virginica']\n",
      "['6.4', '2.7', '5.3', '1.9', 'virginica']\n",
      "['6.8', '3', '5.5', '2.1', 'virginica']\n",
      "['5.7', '2.5', '5', '2', 'virginica']\n",
      "['5.8', '2.8', '5.1', '2.4', 'virginica']\n",
      "['6.4', '3.2', '5.3', '2.3', 'virginica']\n",
      "['6.5', '3', '5.5', '1.8', 'virginica']\n",
      "['7.7', '3.8', '6.7', '2.2', 'virginica']\n",
      "['7.7', '2.6', '6.9', '2.3', 'virginica']\n",
      "['6', '2.2', '5', '1.5', 'virginica']\n",
      "['6.9', '3.2', '5.7', '2.3', 'virginica']\n",
      "['5.6', '2.8', '4.9', '2', 'virginica']\n",
      "['7.7', '2.8', '6.7', '2', 'virginica']\n",
      "['6.3', '2.7', '4.9', '1.8', 'virginica']\n",
      "['6.7', '3.3', '5.7', '2.1', 'virginica']\n",
      "['7.2', '3.2', '6', '1.8', 'virginica']\n",
      "['6.2', '2.8', '4.8', '1.8', 'virginica']\n",
      "['6.1', '3', '4.9', '1.8', 'virginica']\n",
      "['6.4', '2.8', '5.6', '2.1', 'virginica']\n",
      "['7.2', '3', '5.8', '1.6', 'virginica']\n",
      "['7.4', '2.8', '6.1', '1.9', 'virginica']\n",
      "['7.9', '3.8', '6.4', '2', 'virginica']\n",
      "['6.4', '2.8', '5.6', '2.2', 'virginica']\n",
      "['6.3', '2.8', '5.1', '1.5', 'virginica']\n",
      "['6.1', '2.6', '5.6', '1.4', 'virginica']\n",
      "['7.7', '3', '6.1', '2.3', 'virginica']\n",
      "['6.3', '3.4', '5.6', '2.4', 'virginica']\n",
      "['6.4', '3.1', '5.5', '1.8', 'virginica']\n",
      "['6', '3', '4.8', '1.8', 'virginica']\n",
      "['6.9', '3.1', '5.4', '2.1', 'virginica']\n",
      "['6.7', '3.1', '5.6', '2.4', 'virginica']\n",
      "['6.9', '3.1', '5.1', '2.3', 'virginica']\n",
      "['5.8', '2.7', '5.1', '1.9', 'virginica']\n",
      "['6.8', '3.2', '5.9', '2.3', 'virginica']\n",
      "['6.7', '3.3', '5.7', '2.5', 'virginica']\n",
      "['6.7', '3', '5.2', '2.3', 'virginica']\n",
      "['6.3', '2.5', '5', '1.9', 'virginica']\n",
      "['6.5', '3', '5.2', '2', 'virginica']\n",
      "['6.2', '3.4', '5.4', '2.3', 'virginica']\n",
      "['5.9', '3', '5.1', '1.8', 'virginica']\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import pprint\n",
    "with open('data/iris.csv', 'r') as csvfile:\n",
    "    reader = csv.reader(csvfile, delimiter=',')\n",
    "    for row in reader:\n",
    "        print(row)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Containers\n",
    "\n",
    "Python offers a variety of containers each dedicated for different purpose and constrained to harness certain optimisations\n",
    "* lists - generic container , numeric indexing\n",
    "* tuples - immutable lists \n",
    "* dictionaries - key-value organisation \n",
    "* sets - collection of unique elements\n",
    "\n",
    "## Lists\n",
    "Pay attention as these are techniques to handle data pre-processing and manipulation in the batch loading phase "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n"
     ]
    }
   ],
   "source": [
    "homo_list = [12,45,900,78,34,66,17,85]\n",
    "hetero_list = [10,'foo',1.3]\n",
    "print(hetero_list[0])\n",
    "tuple_list = [\n",
    "                (1,'Erebor',800.45),\n",
    "                (2,'Rivendell',500.67),\n",
    "                (3,'Shire',900.12),\n",
    "                (4,'Mordor',1112.30)\n",
    "            ]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Note** : Lists in batched data processing are particularly lists of tuples  \n",
    "batch_instance = (utter,utterance_length,transcript,transcript_lens)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[12, 45, 900, 78, 34, 66, 17, 85, 12, 45, 900, 78, 34, 66, 17, 85]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "l3 = homo_list * 2\n",
    "l3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is different from the result you'd get when operating on numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[12, 45, 900, 78, 34, 66, 17, 85, 10, 'foo', 1.3]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "homo_list + hetero_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`sorted`, `sum`,`max`,`min`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[12, 17, 34, 45, 66, 78, 85, 900]\n",
      "1237\n"
     ]
    }
   ],
   "source": [
    "print(sorted(homo_list))\n",
    "print(sum(homo_list))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Conditional operations - filtering:\n",
    "There are two ways to filter lists:\n",
    "* Index based - Slicing and Dicing\n",
    "* Condition based - List comprehension"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Slicing and Dicing\n",
    "` sliced_list = [ start_idx : end_idx+1 : step]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[12, 45, 900, 78, 34, 66, 17, 85]\n",
      "[12, 45, 900, 78, 34]\n",
      "[85, 66, 78, 45]\n"
     ]
    }
   ],
   "source": [
    "print(homo_list)\n",
    "print(homo_list[:5])\n",
    "print(homo_list[-1:0:-2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### List comprehension\n",
    "\n",
    "`*result*  = [*transform*    *iteration*         *filter*     ]` \n",
    "~~~~\n",
    "res = [ manipulation(instance[2]) for instance in sorted_dataset ]\n",
    "~~~~"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[900, 78, 66, 85]"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res = [no for no in homo_list if no>50]\n",
    "res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.28 ms ± 29.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit  \n",
    "res = [i for i in range(10000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.23 ms ± 88.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "res = []\n",
    "for i in range(10000):\n",
    "    res.append(i)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Usecase: Data Preprocessing and Loading\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'tuple'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(array([[ -2.7760592 , -10.653754  ,  -9.3995695 , ...,   0.2363553 ,\n",
       "          -0.5805931 ,  -0.8171587 ],\n",
       "        [ -2.2426343 ,  -9.265765  ,  -9.315787  , ...,  -0.26111507,\n",
       "          -0.46208572,  -0.9445448 ],\n",
       "        [ -2.7435112 ,  -6.7105646 , -11.795384  , ...,  -0.6318717 ,\n",
       "          -0.56550837,  -1.3585529 ],\n",
       "        ...,\n",
       "        [ -6.937312  , -19.204508  , -24.954329  , ...,  -1.6914577 ,\n",
       "          -1.5678849 ,  -1.5754833 ],\n",
       "        [ -6.4351797 , -18.217642  , -20.86373   , ...,  -2.0262208 ,\n",
       "          -1.7305894 ,  -1.2664866 ],\n",
       "        [ -5.921312  , -16.417336  , -19.451906  , ...,  -2.7078733 ,\n",
       "          -2.3750868 ,  -2.1822453 ]], dtype=float32),\n",
       " 402,\n",
       " array([25, 13, 10,  1, 11, 10, 18,  6, 17, 10,  1, 21, 23, 20,  9, 26,  8,\n",
       "        10, 24,  1,  6,  1, 17, 14, 25, 25, 10, 23,  1, 20, 11,  1, 25, 28,\n",
       "        20,  1, 25, 20,  1, 11, 20, 26, 23,  1, 30, 20, 26, 19, 12,  1, 14,\n",
       "        19,  1, 19, 20, 27, 10, 18,  7, 10, 23,  1,  6, 19,  9,  1,  9, 10,\n",
       "         8, 10, 18,  7, 10, 23]),\n",
       " 74)"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_dataset = get_data()\n",
    "print(type(batch_dataset[0]))\n",
    "\n",
    "## (utterance,utterance_size,transcripts,transcripts_size)\n",
    "batch_dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([25, 13, 10,  1, 21, 20, 21, 26, 17,  6, 25, 14, 20, 19,  1, 17, 14,\n",
       "       27, 10, 24,  1,  7, 30,  1, 13, 10, 23,  9, 14, 19, 12,  1, 12, 20,\n",
       "        6, 25, 24,  1,  6, 19,  9,  1, 24, 13, 10, 10, 21,  1, 20, 23,  1,\n",
       "        7, 30,  1, 25, 23,  6,  9, 14, 19, 12])"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sorting\n",
    "sorted_dataset = sorted(batch_dataset,key=lambda x: x[1])\n",
    "\n",
    "# max\n",
    "max_transcript_len = max(batch_dataset,key=lambda x: x[3] )[3]\n",
    "\n",
    "#list comprehension for extraction\n",
    "transcripts = [ (instance[2],instance[3]) for instance in sorted_dataset]\n",
    "\n",
    "#list comprehension for manipulation \n",
    "\"\"\"\n",
    "Returns transpose of matrix\n",
    "\"\"\"    \n",
    "def manipulation(data):\n",
    "    return data.T\n",
    "\n",
    "pad_len = [ manipulation(instance[2]) for instance in sorted_dataset ]\n",
    "pad_len[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classes\n",
    "\n",
    "Specifically useful for datasets that are supposed to be 'iterable'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Iterable and Iterators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "class IterableADT:\n",
    "    \n",
    "    def __init__(self,train_data_src,train_data_src2, train_label_src):\n",
    "        self.x = train_data_src\n",
    "        self.x2 = train_data_src2\n",
    "        self.y = train_label_src\n",
    "        assert len(self.x) == len(self.x2)\n",
    "        assert len(self.x2) == len(self.y)\n",
    "    \n",
    "    def __len__(self):\n",
    "        return len(self.x)\n",
    "\n",
    "    def __getitem__(self,key):\n",
    "        return (self.x[key],self.x2[key],self.y[key])\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generators\n",
    "Instead of creating classes for iterators , you can use the generator \n",
    "Generators relieve the developer of recording the state of the iteration \n",
    "Simplistically, generators are functions that use `yield` statement instead of `return`\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, 'one')\n",
      "(2, 'two')\n",
      "(3, 'three')\n",
      "(4, 'four')\n",
      "(5, 'five')\n"
     ]
    }
   ],
   "source": [
    "def pairwise_generator(input_data):\n",
    "    for i in range(0,len(input_data),2):\n",
    "        yield (input_data[i],input_data[i+1])\n",
    "\n",
    "data = [1,'one',2,'two',3,'three',4,'four',5,'five']        \n",
    "generator = pairwise_generator(data)\n",
    "for elt in generator:\n",
    "    print(elt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Debugging - Pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "> <ipython-input-62-327e211f3ec6>(4)pairwise_generator()\n",
      "-> for i in range(0,len(data),2):\n",
      "(Pdb) n\n",
      "> <ipython-input-62-327e211f3ec6>(5)pairwise_generator()\n",
      "-> yield (input_data[i],input_data[i+1])\n",
      "(Pdb) n\n",
      "(1, 'one')\n",
      "> <ipython-input-62-327e211f3ec6>(4)pairwise_generator()\n",
      "-> for i in range(0,len(data),2):\n",
      "(Pdb) elt\n",
      "(1, 'one')\n",
      "(Pdb) input_data[i]\n",
      "1\n",
      "(Pdb) n\n",
      "> <ipython-input-62-327e211f3ec6>(5)pairwise_generator()\n",
      "-> yield (input_data[i],input_data[i+1])\n",
      "(Pdb) input_data[i]\n",
      "2\n",
      "(Pdb) n\n",
      "(2, 'two')\n",
      "> <ipython-input-62-327e211f3ec6>(4)pairwise_generator()\n",
      "-> for i in range(0,len(data),2):\n",
      "(Pdb) elt\n",
      "(2, 'two')\n",
      "(Pdb) n\n",
      "> <ipython-input-62-327e211f3ec6>(5)pairwise_generator()\n",
      "-> yield (input_data[i],input_data[i+1])\n",
      "(Pdb) n\n",
      "(3, 'three')\n",
      "> <ipython-input-62-327e211f3ec6>(4)pairwise_generator()\n",
      "-> for i in range(0,len(data),2):\n",
      "(Pdb) n\n",
      "> <ipython-input-62-327e211f3ec6>(5)pairwise_generator()\n",
      "-> yield (input_data[i],input_data[i+1])\n",
      "(Pdb) c\n",
      "(4, 'four')\n"
     ]
    }
   ],
   "source": [
    "import pdb\n",
    "def pairwise_generator(input_data):\n",
    "    pdb.set_trace()\n",
    "    for i in range(0,len(input_data),2):        \n",
    "        yield (input_data[i],input_data[i+1])\n",
    "        \n",
    "data = [1,'one',2,'two',3,'three',4,'four']        \n",
    "generator = pairwise_generator(data)\n",
    "for elt in generator:\n",
    "    print(elt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Py35",
   "language": "python",
   "name": "py35"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
